This document outlines functional Principal Component analysis on dynamic changes on the PC1 scores from the PCA analysis.
library(fdapace)
library(ggplot2)
library(tidyverse)
library(brms)
library(scales)
library(grid)
library(gridExtra)
library(ggpubr)
library(ggsci)
library(emuR)
library(emmeans)
theme_set(theme_classic())
options(mc.cores = parallel::detectCores())
# Tongue spline tracking data
load(file = "data/int.350.rda")
# Participant info
load(file = "data/par.rda")
# select participants that are included in the analysis
sp <- int.350 %>%
select(speaker)
sp <- merge(sp, par, by.x = "speaker")
# N of speakers by L1
sp %>%
group_by(L1) %>%
summarise(n = n_distinct(speaker)) %>%
ungroup()
## # A tibble: 3 Ă— 2
## L1 n
## <chr> <int>
## 1 English 13
## 2 Japanese 29
## 3 Polish, English 1
# Country
sp %>%
group_by(L1, country) %>%
summarise(n = n_distinct(speaker)) %>%
ungroup()
## `summarise()` has grouped output by 'L1'. You can override using the `.groups`
## argument.
## # A tibble: 4 Ă— 3
## L1 country n
## <chr> <chr> <int>
## 1 English Canada 4
## 2 English US 9
## 3 Japanese Japan 29
## 4 Polish, English Poland 1
# fluency rating
sp %>%
mutate(
primary_lang = case_when(
L1 == "Japanese" ~ "Japanese",
TRUE ~ "English"
)
) %>%
group_by(primary_lang) %>%
summarise(
mean_fluency = mean(fluency),
sd_fluency = sd(fluency),
mean_use = mean(use),
sd_use = sd(use),
mean_familiarity = mean(familiarity),
sd_familiarity = sd(familiarity)
) %>%
ungroup()
## # A tibble: 2 Ă— 7
## primary_lang mean_fluency sd_fluency mean_use sd_use mean_familiarity
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 English 7 0 6.57 0.880 7
## 2 Japanese 3.71 1.04 3.66 1.08 3.87
## # ℹ 1 more variable: sd_familiarity <dbl>
## Japanese data
sp.jp <- sp %>%
filter(L1 == "Japanese")
## English study
sp.jp %>%
rename(
overseas = `overseas (month: 1wk = 0.25m)`
) %>%
summarise(mean_study = mean(as.numeric(English_study)),
sd_study = sd(as.numeric(English_study)),
mean_month_overseas = mean(as.numeric(overseas)),
sd_month_overseas = sd(as.numeric(overseas)))
## mean_study sd_study mean_month_overseas sd_month_overseas
## 1 8.822862 2.064805 0.6463423 1.235134
# matching tongue data with participant info
int.350 <- merge(int.350, par, by.x = "speaker", by.y = "speaker") %>%
select(-number, -L1.y) %>%
rename(
L1 = L1.x
)
# N of speaker = 43
int.350 %>%
group_by(L1) %>%
summarise(speaker = n_distinct(speaker),
mean_age = mean(age),
sd_age = sd(age)) %>%
ungroup()
## # A tibble: 2 Ă— 4
## L1 speaker mean_age sd_age
## <chr> <int> <dbl> <dbl>
## 1 English 14 29.1 6.25
## 2 Japanese 29 19.6 0.941
# N of prompts
int.350 %>%
group_by(L1, segment, vowel) %>%
summarise(n = n_distinct(exclude_key)) %>%
ungroup()
## `summarise()` has grouped output by 'L1', 'segment'. You can override using the
## `.groups` argument.
## # A tibble: 15 Ă— 4
## L1 segment vowel n
## <chr> <chr> <chr> <int>
## 1 English /l/ /a/ 193
## 2 English /l/ /i/ 193
## 3 English /l/ /u/ 130
## 4 English /Éą/ /a/ 192
## 5 English /Éą/ /i/ 194
## 6 English /Éą/ /u/ 130
## 7 Japanese /l/ /a/ 295
## 8 Japanese /l/ /i/ 301
## 9 Japanese /l/ /u/ 197
## 10 Japanese /Éą/ /a/ 305
## 11 Japanese /Éą/ /i/ 301
## 12 Japanese /Éą/ /u/ 199
## 13 Japanese /Éľ/ /a/ 180
## 14 Japanese /Éľ/ /i/ 90
## 15 Japanese /Éľ/ /u/ 175
int.350 %>%
group_by(L1, segment) %>%
summarise(n = n_distinct(exclude_key)) %>%
ungroup()
## `summarise()` has grouped output by 'L1'. You can override using the `.groups`
## argument.
## # A tibble: 5 Ă— 3
## L1 segment n
## <chr> <chr> <int>
## 1 English /l/ 516
## 2 English /Éą/ 516
## 3 Japanese /l/ 793
## 4 Japanese /Éą/ 805
## 5 Japanese /Éľ/ 445
# PCA: -350ms onset ----------------------------------------------------------------
## Data Preparation
int.350 <- int.350 %>%
select(speaker:country)
### Convert data frame into a PCA-friendly format
int.350.xy <- int.350 %>%
group_by(speaker) %>%
mutate(
X_z = scale(X),
Y_z = scale(Y)
) %>%
ungroup() %>%
dplyr::select(-X, -Y) %>%
pivot_wider(
names_from = point_number,
values_from = c(X_z, Y_z)
)
### Check 1
# int.350.xy %>%
# filter(speaker == "3wy8us",
# prompt == "ram",
# repetition == "1") %>%
# group_by(speaker, prompt, repetition, time, interval_350, phone) %>%
# summarise() %>%
# ungroup() %>%
# print(n = Inf)
### Check 2
# int.350.xy %>%
# filter(speaker == "2d57ke") %>%
# group_by(speaker, prompt, repetition) %>%
# summarise() %>%
# ungroup() %>%
# print(n = Inf)
### Check 3
# int.350.xy %>%
# group_by(speaker) %>%
# summarise() %>%
# ungroup() %>%
# print(n = Inf)
### check column names
# colnames(int.350.xy)
### Remove and save meta data
int.350.pca <- int.350.xy %>%
dplyr::select(-speaker, -rec_date, -time, -prompt, -L1, -frame_number, -spline_number, -repetition, -segment, -phone, -position, -interval_350, -vowel, -exclude_key, -start_time, -end_time, -total_duration, -proportional_time, -vowel_start, -vowel_start_prop, -acoustic_start, -acoustic_start_prop, -gender, -age, -country)
# remove the meta data that we don't need for PCA
meta.350 <- int.350.xy %>%
dplyr::select(speaker, rec_date, time, prompt, L1, frame_number, spline_number, repetition, segment, phone, position, interval_350, vowel, exclude_key, start_time, end_time, total_duration, proportional_time, vowel_start, vowel_start_prop, acoustic_start, acoustic_start_prop, gender, age, country)
# separate and save the meta data for later
### Check if there are any NAs
table(is.na(int.350.pca))
##
## FALSE
## 3556476
# int.350.pca <- drop_na(int.350.pca) # run this only when the table(is.na()) returns TRUE values
### Run PCA on all the liquid tokens
pca.350 <- princomp(int.350.pca)
summary(pca.350)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 0.6801958 0.6005386 0.34153349 0.28791377 0.24685574
## Proportion of Variance 0.3927849 0.3061743 0.09902695 0.07037391 0.05173367
## Cumulative Proportion 0.3927849 0.6989591 0.79798608 0.86836000 0.92009367
## Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## Standard deviation 0.17905382 0.16737926 0.11178715 0.08286500 0.064901130
## Proportion of Variance 0.02721786 0.02378429 0.01060891 0.00582947 0.003575949
## Cumulative Proportion 0.94731153 0.97109582 0.98170472 0.98753419 0.991110142
## Comp.11 Comp.12 Comp.13 Comp.14
## Standard deviation 0.057522053 0.040252630 0.0338309405 0.0298843284
## Proportion of Variance 0.002809025 0.001375547 0.0009716615 0.0007581826
## Cumulative Proportion 0.993919167 0.995294714 0.9962663753 0.9970245580
## Comp.15 Comp.16 Comp.17 Comp.18
## Standard deviation 0.0280492532 0.0251609230 0.0237700494 0.0211586285
## Proportion of Variance 0.0006679277 0.0005374524 0.0004796749 0.0003800685
## Cumulative Proportion 0.9976924856 0.9982299380 0.9987096129 0.9990896814
## Comp.19 Comp.20 Comp.21 Comp.22
## Standard deviation 0.018903311 0.0177535200 0.0158079519 0.0122418144
## Proportion of Variance 0.000303363 0.0002675813 0.0002121476 0.0001272267
## Cumulative Proportion 0.999393044 0.9996606257 0.9998727733 1.0000000000
# Summarise to see how much variation each PCA accounts for
### Plotting the variance explained (optional)
var.explained.350 <- pca.350$sdev^2 / sum(pca.350$sdev^2)
# making var_explained as a tibble and add colname
var.explained.350 <- as_tibble(var.explained.350)
var.explained.350 <- var.explained.350 %>%
as_tibble() %>%
mutate(
PC = row_number()
)
# create a plot
var.explained.350.PC10 <- var.explained.350 %>%
filter(PC < 11) # only plot PC10 or below
var.plot.350 <- var.explained.350.PC10 %>%
ggplot(mapping = aes(x = PC, y = value)) +
geom_line() +
geom_text(data = subset(var.explained.350.PC10, PC < 5), aes(label = round(value, digits = 5)), nudge_x = 0.8) +
geom_label(data = subset(var.explained.350.PC10, PC < 5), aes(label = PC), label.padding = unit(0.40, "lines")) +
geom_point(data = subset(var.explained.350.PC10, PC > 5)) +
geom_hline(yintercept = 0.05, linetype = 'dotted') +
xlab("Principal Component") +
ylab("Variance Explained") +
ggtitle("Proportion of Variance explained by each PC") +
# ylim(0, 0.6) +
theme_classic() +
theme(plot.title = element_text(size = 18, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15),
strip.text.y = element_text(size = 15, angle = 0),
legend.text = element_text(size = 12),
legend.position = "bottom",
legend.key.width = unit(3, "cm")
# legend.title = element_blank()
)
var.plot.350
# the plot shows that the first 4 PCs account for more than 5% of the variation in the data
ggsave(var.plot.350, filename = "figure/varplot_350ms.png", width = 10, height = 5, dpi = 1000)
## Preparing PCA plots
# Get the results of the PCA which are useful
pca.number.350 <- pca.350$scores
# Put it into a sensible format as some variables come out weird
pca.number.350 <- as_tibble(pca.number.350)
# Combine with non-numeric information from earlier
pca.result.350 <- cbind(meta.350, pca.number.350)
# normalise PCs by speaker for comparison
pca.result.350 = pca.result.350 %>%
group_by(speaker) %>%
mutate(
PC1z = scale(Comp.1),
PC2z = scale(Comp.2),
PC3z = scale(Comp.3),
PC4z = scale(Comp.4)
)
## Work out parameters of variation in first 3 PCs
# Mean values from the output of the PCA
mean.pca.350 <- tibble::enframe(pca.350$center)
## make subsettable variable
mean.pca.350 <- mean.pca.350 %>%
mutate(axis = substr(mean.pca.350$name, 1, 1))
## subset data to make into a matrix of x and y values
X <- subset(mean.pca.350, mean.pca.350$axis == 'X')
Y <- subset(mean.pca.350, mean.pca.350$axis == 'Y')
mean.pca.350 <- cbind(X, Y)
## changing colnames
colnames(mean.pca.350) = c("number1", "mean.x", "axis1", "number2", "mean.y", "axis2")
## get loadings - eigenvectors
loadings.350 <- as.table(pca.350$loadings)
# PC1 ---------------------------------------------------------------------
## get loadings for PC1 in a sensible format
PC1.l.350 <- as.data.frame(loadings.350) %>%
filter(Var2 == "Comp.1")
PC1.l.350 <- PC1.l.350 %>%
mutate(axis = substr(PC1.l.350$Var1, 1, 1))
PC1.l.350.x <- subset(PC1.l.350, PC1.l.350$axis == 'X')
PC1.l.350.y <- subset(PC1.l.350, PC1.l.350$axis == 'Y')
PC1.l.350 <- cbind(PC1.l.350.x, PC1.l.350.y)
colnames(PC1.l.350) = c("useless", "useless2", "PC1.l.350.x", "useless3", "useless4", "useless5", "PC1.l.350.y", "useless6")
PC1.l.350$useless <- NULL
PC1.l.350$useless2 <- NULL
PC1.l.350$useless3 <- NULL
PC1.l.350$useless4 <- NULL
PC1.l.350$useless5 <- NULL
PC1.l.350$useless6 <- NULL
# PC2 ---------------------------------------------------------------------
## get loadings for PC1 in a sensible format
PC2.l.350 <- as.data.frame(loadings.350) %>%
filter(Var2 == "Comp.2")
PC2.l.350 <- PC2.l.350 %>%
mutate(axis = substr(PC2.l.350$Var1, 1, 1))
PC2.l.350.x <- subset(PC2.l.350, PC2.l.350$axis == 'X')
PC2.l.350.y <- subset(PC2.l.350, PC2.l.350$axis == 'Y')
PC2.l.350 <- cbind(PC2.l.350.x, PC2.l.350.y)
colnames(PC2.l.350) = c("useless", "useless2", "PC2.l.350.x", "useless3", "useless4", "useless5", "PC2.l.350.y", "useless6")
PC2.l.350$useless <- NULL
PC2.l.350$useless2 <- NULL
PC2.l.350$useless3 <- NULL
PC2.l.350$useless4 <- NULL
PC2.l.350$useless5 <- NULL
PC2.l.350$useless6 <- NULL
# PC3 ---------------------------------------------------------------------
## get loadings for PC3 in a sensible format
PC3.l.350 <- as.data.frame(loadings.350) %>%
filter(Var2 == "Comp.3")
PC3.l.350 <- PC3.l.350 %>%
mutate(axis = substr(PC3.l.350$Var1, 1, 1))
PC3.l.350.x <- subset(PC3.l.350, PC3.l.350$axis == 'X')
PC3.l.350.y <- subset(PC3.l.350, PC3.l.350$axis == 'Y')
PC3.l.350 <- cbind(PC3.l.350.x, PC3.l.350.y)
colnames(PC3.l.350) = c("useless", "useless2", "PC3.l.350.x", "useless3", "useless4", "useless5", "PC3.l.350.y", "useless6")
PC3.l.350$useless <- NULL
PC3.l.350$useless2 <- NULL
PC3.l.350$useless3 <- NULL
PC3.l.350$useless4 <- NULL
PC3.l.350$useless5 <- NULL
PC3.l.350$useless6 <- NULL
# PC4 ---------------------------------------------------------------------
## get loadings for PC4 in a sensible format
PC4.l.350 <- as.data.frame(loadings.350) %>%
filter(Var2 == "Comp.4")
PC4.l.350 <- PC4.l.350 %>%
mutate(axis = substr(PC4.l.350$Var1, 1, 1))
PC4.l.350.x <- subset(PC4.l.350, PC4.l.350$axis == 'X')
PC4.l.350.y <- subset(PC4.l.350, PC4.l.350$axis == 'Y')
PC4.l.350 <- cbind(PC4.l.350.x, PC4.l.350.y)
colnames(PC4.l.350) = c("useless", "useless2", "PC4.l.350.x", "useless3", "useless4", "useless5", "PC4.l.350.y", "useless6")
PC4.l.350$useless <- NULL
PC4.l.350$useless2 <- NULL
PC4.l.350$useless3 <- NULL
PC4.l.350$useless4 <- NULL
PC4.l.350$useless5 <- NULL
PC4.l.350$useless6 <- NULL
## Plotting the meaning of PCs
# bind together all of the above
loadings.350 <- cbind(PC1.l.350, PC2.l.350, PC3.l.350, PC4.l.350)
# get sds of first 4 PCs
sd.350 <- tibble::enframe(pca.350$sdev)
sd_PC1.350 <- as.numeric(sd.350[1,2])
sd_PC2.350 <- as.numeric(sd.350[2,2])
sd_PC3.350 <- as.numeric(sd.350[3,2])
sd_PC4.350 <- as.numeric(sd.350[4,2])
# calculate estimated values including sd
# midpoint model
estimate.350 <- cbind(mean.pca.350, loadings.350)
estimate.350$PC1.max.x <- estimate.350$mean.x + sd_PC1.350*estimate.350$PC1.l.350.x
estimate.350$PC1.min.x <- estimate.350$mean.x - sd_PC1.350*estimate.350$PC1.l.350.x
estimate.350$PC1.max.y <- estimate.350$mean.y + sd_PC1.350*estimate.350$PC1.l.350.y
estimate.350$PC1.min.y <- estimate.350$mean.y - sd_PC1.350*estimate.350$PC1.l.350.y
estimate.350$PC2.max.x <- estimate.350$mean.x + sd_PC2.350*estimate.350$PC2.l.350.x
estimate.350$PC2.min.x <- estimate.350$mean.x - sd_PC2.350*estimate.350$PC2.l.350.x
estimate.350$PC2.max.y <- estimate.350$mean.y + sd_PC2.350*estimate.350$PC2.l.350.y
estimate.350$PC2.min.y <- estimate.350$mean.y - sd_PC2.350*estimate.350$PC2.l.350.y
estimate.350$PC3.max.x <- estimate.350$mean.x + sd_PC3.350*estimate.350$PC3.l.350.x
estimate.350$PC3.min.x <- estimate.350$mean.x - sd_PC3.350*estimate.350$PC3.l.350.x
estimate.350$PC3.max.y <- estimate.350$mean.y + sd_PC3.350*estimate.350$PC3.l.350.y
estimate.350$PC3.min.y <- estimate.350$mean.y - sd_PC3.350*estimate.350$PC3.l.350.y
estimate.350$PC4.max.x <- estimate.350$mean.x + sd_PC4.350*estimate.350$PC4.l.350.x
estimate.350$PC4.min.x <- estimate.350$mean.x - sd_PC4.350*estimate.350$PC4.l.350.x
estimate.350$PC4.max.y <- estimate.350$mean.y + sd_PC4.350*estimate.350$PC4.l.350.y
estimate.350$PC4.min.y <- estimate.350$mean.y - sd_PC4.350*estimate.350$PC4.l.350.y
# Make figures ------------------------------------------------------------
# PC1
PC1.350.plot <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC1") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15)
)
PC2.350.plot <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC2") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15)
)
PC3.350.plot <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC3.max.x, y = PC3.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC3.min.x, y = PC3.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC3.max.x, y = PC3.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC3.min.x, y = PC3.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC3") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15)
)
PC4.350.plot <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC4.max.x, y = PC4.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC4.min.x, y = PC4.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC4.max.x, y = PC4.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC4.min.x, y = PC4.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC4") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 15, hjust = 0.5, vjust = 1.5, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15)
)
# Publication plot
pca_meaning_all.350 <- grid.arrange(PC1.350.plot, PC2.350.plot, PC3.350.plot, PC4.350.plot, ncol = 2)
fdapace# IDs = token column; tVec = time column; yVec = variable column(s)
input.PC1 <- fdapace::MakeFPCAInputs(IDs = pca.result.350$exclude_key, tVec = pca.result.350$proportional_time, yVec = pca.result.350$PC1z)
# Check if there's any issues with the data
fdapace::CheckData(input.PC1$Ly, input.PC1$Lt)
# No errors have been returned, so let's now run fPCA on the dynamic PC1 trajectory
PC1 <- fdapace::FPCA(Ly = input.PC1$Ly, Lt = input.PC1$Lt)
# saving the FPC1 output
save(PC1, file = "data/PC1_FPCA_BAAP.rda")
# understanding FPC1/PC1
## eigenvalues
PC1$lambda
## [1] 36.3817095 15.4263673 7.1191478 3.0323552 0.7206187
## the cumulative percentage of variance explained by the eigenvalue
PC1$cumFVE
## [1] 0.5793283 0.8249719 0.9383345 0.9866205 0.9980954
## fPC1: 0.5793283
## fPC2: 0.2456436
## fPC3: 0.1133626
## fPC4: 0.048286
## PC scores -> each row is 1 token, each column is one PC
# PC1$xiEst
## plot
plot(PC1)
## scree plot
# CreateScreePlot(PC1)
## path plot
# CreatePathPlot(PC1, xlab = "normalised time", ylab = "PC1 (tongue body movement)")
# the input data to FPCA() (just in case you want to check the specific input data you used)
# PC1$inputData$Lt
# load the fPCA results for PC1
load(file = "data/PC1_FPCA_BAAP.rda")
# function: get PC scores + return data frame with PCs for each token
get_pc_scores <- function(fpcaObj){
pcs <- data.frame(fpcaObj$xiEst)
token <- names(fpcaObj$inputData$Lt)
df <- cbind(token, pcs)
n_pcs <- length(fpcaObj$lambda) # get number of PCs
pc_names <- paste0("PC", 1:n_pcs) # create colnames for PCs
names(df) <- c("exclude_key", pc_names) # add colnames for token + PCs
return(df)
}
# get PC scores w/ token info
pc1_df <- get_pc_scores(PC1)
# join PCs (dat) with selected cols from original data frame
## store meta info
meta <- pca.result.350 %>%
select(speaker, L1, prompt, segment, vowel, exclude_key)
## merge the list and meta data - unique(meta) because otherwise there would be lots of duplicates
dat.PC1 <- left_join(pc1_df, unique(meta), by = "exclude_key")
# PC scores clustering per group
PC1.scatter <- dat.PC1 %>%
filter(segment %in% c("/l/", "/Éą/")) %>%
# filter(group %in% c("advanced", "English")) %>%
ggplot2::ggplot() +
aes(x = PC1, y = PC2, colour = vowel, shape = vowel) +
geom_point(alpha = 0.3, size = 2, show.legend = FALSE) +
stat_ellipse(aes(color = vowel, linetype = vowel), level = 0.95, lwd = 1.2) +
facet_grid(segment ~ L1) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
# ggtitle("Tongue body movement (PC1) for English and Japanese liquids") +
guides(linetype = "none") +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
# ylim(c(-4, 4)) +
labs(x = "FPC1", y = "FPC2", colour = "Adjacent vowel")
PC1.scatter
ggsave(PC1.scatter, filename = "figure/PC1_scatter.png", width = 15, height = 10, dpi = 1000)
## PC1
PC1.dyn.350 <- pca.result.350 %>%
group_by(speaker, prompt, repetition) %>%
ggplot() +
geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
# geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
# facet_wrap(segment ~ vowel, ncol = 2) +
facet_grid(segment ~ L1) +
# ggtitle("PC1 (Onset: -350 ms)") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +
geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 20),
strip.text.y = element_text(angle = 0, size = 20)
) +
ylim(c(-4, 4)) +
labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel")
PC1.dyn.350
ggsave(PC1.dyn.350, filename = "figure/dynamic_PC1.jpg", width = 15, height = 10, dpi = 1000)
# function: define perturbation function (±Q = ±sd, k = PC number)
perturbation <- function(fpcaObj, Q, k){
Q * sqrt(fpcaObj$lambda[k]) * fpcaObj$phi[,k] + fpcaObj$mu
}
# function: create perturbation object with mean and ±Q sd as a data frame (for one PC only)
# can validate against fdapace::GetMeanCurve and fdapace::CreateModeOfVarPlot
perturbation_object <- function(fpcaObj, Q, k){
time <- fpcaObj$workGrid # grid of time values
mean <- fpcaObj$mu # mean trajectory
Qplus <- perturbation(fpcaObj, Q, k) # +Q sd
Qminus <- perturbation(fpcaObj, -Q, k) # -Q sd
df <- cbind(time, mean, Qplus, Qminus)
colnames(df) <- c("time", "mean", "Qplus", "Qminus")
df <- data.frame(df)
df$PC <- paste0("PC", k) # add PC colname
return(df)
}
# function: create perturbation data frame with mean and ±Q sd (for all PCs)
# to do: add ability to pass list of Q values for gradient perturbation function
get_perturbation <- function(fpcaObj, Q){
n_pcs <- length(fpcaObj$lambda)
k <- 1:n_pcs
df <- lapply(k, perturbation_object, fpcaObj=fpcaObj, Q=Q)
df <- dplyr::bind_rows(df) # unnest lists into single df
return(df)
}
# get mean trajectory and ±2 sd for all PCs
p_PC1 <- get_perturbation(PC1, Q = 2)
# Manually calculating proportional time for liquid onset and offset for plotting
pca.result.350 %>%
ungroup() %>%
summarise(mean_start = mean(acoustic_start_prop),
mean_end = mean(vowel_start_prop))
## # A tibble: 1 Ă— 2
## mean_start mean_end
## <dbl> <dbl>
## 1 56.7 68.2
# plot data, perturbation + PC scores ------------------------------------
# perturbation plot
pc1_perturbation <- p_PC1 %>%
filter(PC %in% c("PC1", "PC2")) %>%
mutate(
fPC = case_when(
PC == "PC1" ~ "fPC1",
PC == "PC2" ~ "fPC2"
)
) %>%
ggplot2::ggplot() +
aes(x = time, y = mean) +
geom_path() +
geom_point(aes(y = Qplus), shape = 3, size = 3, colour = "red") +
geom_point(aes(y = Qminus), shape = 95, size = 5, colour = "blue") +
xlab("Proportional Time (%)") +
ylab("PC") +
geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2) +
geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2) +
facet_wrap(~ fPC, ncol = 2) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 20),
strip.text.y = element_text(angle = 0, size = 20)
) +
labs(x = "Proportional Time(%)", y = "fPC")
pc1_perturbation
ggsave(pc1_perturbation, filename = "figure/perturbation_PC1.jpg", width = 15, height = 5, dpi = 1000)
# mean fPC1 trajectory
# pc1_mean_curve <- fdapace::GetMeanCurve(Ly = input.PC1$Ly, Lt = input.PC1$Lt, optns = list(plot = TRUE))
pc1_mu_values <- data.frame(PC1$mu) # mean curve values
pc1_mu_time <- data.frame(PC1$workGrid) # timepoints used for estimating the curve
pc1_phi <- data.frame(PC1$phi) # eigenfunction at each timepoint: workGrid * nlambda (e.g., 255 = 51 workGrid * 5 lambda)
pc1_lambda <- data.frame(PC1$lambda) # PC loadings for each PC: currently 5
# create a data frame containing mean curve, time and eigenfunctions assocaited with each PC at each time point
## add an extra column 'col_number' as a common index across the data frames - useful when merging everything together later on
### mean curve
pc1_mu_values <- pc1_mu_values %>%
mutate(
col_number = row_number()
)
### sampling time points
pc1_mu_time <- pc1_mu_time %>%
mutate(
col_number = row_number()
)
### eigenfunction
pc1_phi <- pc1_phi %>%
mutate(
col_number = row_number()
)
### pc loadings
pc1_lambda <- pc1_lambda %>%
mutate(
PC = str_c("PC", row_number()),
PC = str_c(PC, "lambda", sep = "_")
) %>%
pivot_wider(names_from = "PC", values_from = "PC1.lambda") %>%
slice(rep(1:n(), each = 51)) %>%
mutate(
col_number = row_number()
)
## merging all data together one by one
PC1.rec <- left_join(pc1_mu_values, pc1_mu_time, by = "col_number")
PC1.rec <- left_join(PC1.rec, pc1_phi, by = "col_number")
PC1.rec <- left_join(PC1.rec, pc1_lambda, by = "col_number")
## tidying up some column names
PC1.rec <- PC1.rec %>%
select(col_number, PC1.workGrid, PC1.mu, X1, X2, X3, X4, X5, PC1_lambda, PC2_lambda, PC3_lambda, PC4_lambda, PC5_lambda) %>%
rename(
mean = PC1.mu,
time = PC1.workGrid,
PC1_eigen = X1,
PC2_eigen = X2,
PC3_eigen = X3,
PC4_eigen = X4,
PC5_eigen = X5
)
## plotting the eigenfunctions - this should match with a sub-plot in bottom right created with plot(PC1)
PC1.rec %>%
ggplot() +
# geom_path(aes(x = time, y = mean)) +
geom_path(aes(x = time, y = PC1_eigen), colour = "black", linewidth = 1.5) +
geom_path(aes(x = time, y = PC2_eigen), colour = "red", linetype = 2, linewidth = 1.5) +
geom_path(aes(x = time, y = PC3_eigen), colour = "darkgreen", linetype = 3, linewidth = 1.5) +
# geom_path(aes(x = time, y = value, colour = pc)) +
geom_hline(yintercept = 0) +
labs(x = "time", y = "eigenfunctions", title = "First 3 eigenfunctions")
## check if this matches plot(PC1)
plot(PC1)
# PC scores -> each row is 1 token, each column is one PC
# head(PC1$xiEst)
# PC scores have already been added to the main data set
# head(dat.PC1)
# duplicate each row by 51 times
dat.PC1.time <- dat.PC1 %>%
slice(rep(1:n(), each = 51))
# add col_names to merge with the other data frame
dat.PC1.time <- dat.PC1.time %>%
group_by(exclude_key) %>%
mutate(
col_number = row_number()
) %>%
ungroup()
# merge
dat.PC1.time <- left_join(dat.PC1.time, PC1.rec, by = "col_number")
pca.result.350.BAAP <- pca.result.350 %>%
filter(segment %in% c("/l/", "/Éą/"))
rec.PC1.fPC1 <- dat.PC1.time %>%
mutate(
PC1_reconstruct = PC1 * PC1_eigen + mean,
PC2_reconstruct = PC2 * PC2_eigen + mean,
PC3_reconstruct = PC3 * PC3_eigen + mean,
PC4_reconstruct = PC4 * PC4_eigen + mean,
PC5_reconstruct = PC5 * PC5_eigen + mean,
) %>%
mutate(
language = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
)
) %>%
# group_by(exclude_key) %>%
filter(segment %in% c("/l/", "/Éą/")) %>%
ggplot() +
geom_path(aes(x = time, y = PC1_reconstruct, group = exclude_key, colour = vowel), alpha = 0.2, show.legend = TRUE) +
# geom_smooth(aes(x = time, y = PC1_reconstruct, group = vowel, colour = vowel)) +
scale_color_manual(values = c("blue3", "brown2", "darkolivegreen")) +
labs(x = "Proportional Time (%)", y = "Reconstructed PC1 scores from FPC1") +
labs(title = "FPC1 for PC1") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(data = pca.result.350.BAAP, aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(data = pca.result.350.BAAP, aes(x = mean(vowel_start_prop)+9, y = 1.5), label = "vowel\nonset", colour = "Black", size = 10) +
geom_vline(data = pca.result.350.BAAP, aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(data = pca.result.350.BAAP, aes(x = mean(acoustic_start_prop)-9, y = 1.5), label = "liquid\nonset", colour = "Black", size = 10) +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
facet_grid(segment ~ language) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
# plot.title = element_text(size = 20, hjust = 0, face = "bold"),
plot.title = element_blank(),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
)
ggsave(rec.PC1.fPC1, filename = "figure/reconstructed_PC1_fPC1.jpg", width = 15, height = 10, dpi = 300)
# raw data and reconstructed trajectories side by side
raw.rec <- ggpubr::ggarrange(PC1.dyn.350, rec.PC1.fPC1, common.legend = TRUE, legend = "bottom")
raw.rec
ggsave(raw.rec, filename = "figure/traj_side.jpg", width = 25, height = 10, dpi = 500)
# raw
## PC1
PC1.dyn.350.jp <- pca.result.350 %>%
# filter(segment %in% c("/l/", "/Éą/")) %>%
group_by(speaker, prompt, repetition) %>%
ggplot() +
geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
# geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
# facet_wrap(segment ~ vowel, ncol = 2) +
facet_grid(segment ~ L1) +
# ggtitle("PC1 (Onset: -350 ms)") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +
geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
ylim(c(-4, 4)) +
labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel")
# FPC1 reconstruction
rec.PC1.fPC1.jp <- dat.PC1.time %>%
mutate(
PC1_reconstruct = PC1 * PC1_eigen + mean,
PC2_reconstruct = PC2 * PC2_eigen + mean,
PC3_reconstruct = PC3 * PC3_eigen + mean,
PC4_reconstruct = PC4 * PC4_eigen + mean,
PC5_reconstruct = PC5 * PC5_eigen + mean,
) %>%
mutate(
language = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
)
) %>%
# group_by(exclude_key) %>%
# filter(segment %in% c("/l/", "/Éą/")) %>%
ggplot() +
geom_path(aes(x = time, y = PC1_reconstruct, group = exclude_key, colour = vowel), alpha = 0.2, show.legend = TRUE) +
# geom_smooth(aes(x = time, y = PC1_reconstruct, group = vowel, colour = vowel)) +
scale_color_manual(values = c("blue3", "brown2", "darkolivegreen")) +
labs(x = "Proportional Time (%)", y = "Reconstructed PC1 scores from FPC1") +
labs(title = "FPC1 for PC1") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(data = pca.result.350.BAAP, aes(x = mean(vowel_start_prop)+9, y = 1.5), label = "vowel\nonset", colour = "Black", size = 10) +
geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(data = pca.result.350.BAAP, aes(x = mean(acoustic_start_prop)-9, y = 1.5), label = "liquid\nonset", colour = "Black", size = 10) +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
facet_grid(segment ~ language) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
# plot.title = element_text(size = 20, hjust = 0, face = "bold"),
plot.title = element_blank(),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
)
# raw data and reconstructed trajectories side by side
raw.rec.jp <- ggpubr::ggarrange(PC1.dyn.350.jp, rec.PC1.fPC1.jp, common.legend = TRUE, legend = "bottom")
raw.rec.jp
ggsave(raw.rec.jp, filename = "figure/traj_side_jp.jpg", width = 25, height = 10, dpi = 500)
# PC1
PC1.350.plot.BAAP <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC1.max.x, y = PC1.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC1.min.x, y = PC1.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC1") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 30, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15)
)
# PC2
PC2.350.plot.BAAP <- ggplot() +
geom_path(data = estimate.350, aes(x = mean.x, y = mean.y), linewidth = 1.5) +
geom_path(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), linewidth = 1, alpha = 0.5, linetype = "dashed") +
geom_path(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), linewidth = 1, alpha = 0.5, linetype = "dotted") +
geom_point(data = estimate.350, aes(x = PC2.max.x, y = PC2.max.y), shape = 3, size = 3, stroke = 2) +
geom_point(data = estimate.350, aes(x = PC2.min.x, y = PC2.min.y), shape = "\u2212", size = 5, stroke = 8) +
xlab("X") + ylab("Y") +
ggtitle("PC2") +
theme_classic() +
# ylim(-35, 25) +
theme(plot.title = element_text(size = 30, face = "bold"),
legend.position = "top",
legend.title = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15),
strip.text.x = element_text(size = 15),
)
pc.plot.2 <- ggpubr::ggarrange(PC1.350.plot.BAAP, PC2.350.plot.BAAP, ncol = 1)
pc.plot.2
ggsave(pc.plot.2, filename = "figure/PC1_PC2.jpg", width = 8, height = 12, dpi = 1000, bg = "transparent")
## PC1
PC1.dyn.350 <- pca.result.350 %>%
filter(segment %in% c("/l/", "/Éą/")) %>%
group_by(speaker, prompt, repetition) %>%
ggplot() +
geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03) +
# geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), linewidth = 2, se = FALSE, show.legend = TRUE) +
stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
# facet_wrap(segment ~ vowel, ncol = 2) +
facet_grid(segment ~ L1) +
# ggtitle("PC1 (Onset: -350 ms)") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +
geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
ylim(c(-4, 4)) +
labs(x = "Proportional time (%)", y = "PC1 (z-score)", colour = "Adjacent vowel")
PC1.dyn.350
ggsave(PC1.dyn.350, filename = "figure/dynamic_PC1.jpg", width = 20, height = 10, dpi = 500)
# e.g. plot fPC1
PC1.fPC1.violinplot.BAAP <- dat.PC1 %>%
# filter(segment %in% c("/Éą/", "/l/")) %>%
ggplot2::ggplot()+
# aes(x = reorder(speaker, -PC1), y = PC1, fill = L1) +
aes(x = vowel, y = PC1, fill = vowel) +
geom_violin(show.legend = FALSE, alpha = 0.5) +
geom_boxplot(show.legend = FALSE, alpha = 0.7, width = 0.3, fill = "white") +
geom_hline(yintercept = 0, linetype = 2) +
scale_fill_manual(values = c("blue4", "brown4", "darkolivegreen")) +
facet_grid(segment ~ L1) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 20),
strip.text.y = element_text(angle = 0, size = 20)
) +
# ylim(c(-4, 4)) +
labs(x = "Adjacent vowels", y = "fPC1")
PC1.fPC1.violinplot.BAAP
## raw data
dyn.all <- pca.result.350 %>%
filter(segment %in% c("/l/", "/Éą/")) %>%
group_by(speaker, prompt, repetition) %>%
ggplot() +
geom_path(aes(x = proportional_time, y = PC1z, colour = vowel, group = rec_date), alpha = 0.03, show.legend = FALSE) +
# geom_smooth(colour = "black", linewidth = 3, se = FALSE, show.legend = TRUE) +
geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), colour = "white", alpha = 0.5, linewidth = 3, se = FALSE, show.legend = FALSE) +
geom_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), alpha = 0.5, linewidth = 2, se = FALSE, show.legend = FALSE) +
stat_smooth(aes(x = proportional_time, y = PC1z, colour = vowel, group = vowel), method = "gam", geom = "ribbon", fill = NA, linewidth = 0.5, linetype = 3, show.legend = FALSE) +
# facet_wrap(segment ~ vowel, ncol = 2) +
# facet_grid(segment ~ L1) +
# ggtitle("PC1 (Onset: -350 ms)") +
geom_hline(yintercept = 0, linetype = 1, linewidth = 0.1) +
geom_vline(aes(xintercept = mean(vowel_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(vowel_start_prop), y = 1.3), label = "vowel\nonset", colour = "Black", size = 5) +
geom_vline(aes(xintercept = mean(acoustic_start_prop)), linetype = 2, linewidth = 0.5) +
# geom_text(aes(x = mean(acoustic_start_prop), y = 1.3), label = "liquid\nonset", colour = "Black", size = 5) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
# axis.title = element_text(size = 20),
axis.title = element_blank(),
plot.title = element_text(size = 30, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
ylim(c(-4, 4)) +
labs(x = "Proportional time (%)", y = "PC1", colour = "Adjacent vowel")
## FPC1 perturbation
pc1_perturbation_BAAP <- p_PC1 %>%
filter(PC == "PC1") %>%
mutate(
fPC = case_when(
PC == "PC1" ~ "fPC1"
)
) %>%
ggplot2::ggplot() +
aes(x = time, y = mean) +
geom_path() +
geom_point(aes(y = Qplus), shape = 3, size = 3, colour = "red") +
geom_point(aes(y = Qminus), shape = 95, size = 5, colour = "blue") +
xlab("Proportional Time (%)") +
ylab("PC") +
geom_vline(data = pca.result.350, aes(xintercept = mean(acoustic_start_prop)), linetype = 2) +
geom_vline(data = pca.result.350, aes(xintercept = mean(vowel_start_prop)), linetype = 2) +
# facet_wrap(~ fPC, ncol = 2) +
scale_color_manual(values = c("blue4", "brown4", "darkolivegreen")) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(1, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.title = element_text(size = 20),
axis.title.y = element_blank(),
plot.title = element_text(size = 20, hjust = 0.5, face = "bold"),
strip.text.x = element_text(size = 20),
strip.text.y = element_text(angle = 0, size = 20)
) +
labs(x = "Proportional Time(%)", y = "PC1 captured by FPC1")
illustration <- ggpubr::ggarrange(dyn.all, pc1_perturbation_BAAP, ncol = 1)
illustration
ggsave(illustration, filename = "figure/FPCA_illustration.jpg", width = 5, height = 5, dpi = 1000)
# subset English /l/ data
dat.PC1.EN.L <- dat.PC1 %>%
filter(segment == "/l/") %>%
rename(
fPC1 = PC1,
fPC2 = PC2,
fPC3 = PC3,
fPC4 = PC4,
fPC5 = PC5
)
# define the baseline level explicitly
dat.PC1.EN.L <- dat.PC1.EN.L %>%
mutate(
vowel = case_when(
vowel == "/a/" ~ "A",
vowel == "/i/" ~ "I",
vowel == "/u/" ~ "U"
)
)
dat.PC1.EN.L$vowel <- factor(dat.PC1.EN.L$vowel, levels = c("I", "A", "U"))
dat.PC1.EN.L$L1 <- factor(dat.PC1.EN.L$L1, levels = c("English", "Japanese"))
# convert other variables into factor
dat.PC1.EN.L$speaker <- as.factor(dat.PC1.EN.L$speaker)
dat.PC1.EN.L$speaker <- droplevels(dat.PC1.EN.L$speaker)
dat.PC1.EN.L$prompt <- as.factor(dat.PC1.EN.L$prompt)
dat.PC1.EN.L$prompt <- droplevels(dat.PC1.EN.L$prompt)
levels(dat.PC1.EN.L$prompt)
## [1] "lamb" "lamp" "lap" "leaf" "leap" "leave" "loom" "lube"
# specify prior
b1_prior <- c(
brms::set_prior("normal(0, 20)", class = "Intercept"),
brms::set_prior("normal(0, 100)", class = "b"),
brms::set_prior("normal(0, 10)", class = "sd"),
brms::set_prior("normal(0, 10)", class = "sigma"),
brms::set_prior("lkj(2)", class = "cor"))
# summary
summary(PC1.FPC1.L.m1.BAAP)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.L (Number of observations: 1309)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.34 0.92 1.26 4.69 1.00 11954
## sd(L1Japanese) 2.18 0.92 1.15 4.50 1.00 12011
## cor(Intercept,L1Japanese) -0.72 0.27 -0.98 0.01 1.00 14510
## Tail_ESS
## sd(Intercept) 14206
## sd(L1Japanese) 13232
## cor(Intercept,L1Japanese) 20002
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.81 0.34 2.23 3.57 1.00 11447
## sd(vowelA) 3.96 0.48 3.14 5.03 1.00 9436
## sd(vowelU) 2.74 0.38 2.09 3.56 1.00 12869
## cor(Intercept,vowelA) -0.38 0.14 -0.62 -0.09 1.00 8383
## cor(Intercept,vowelU) -0.21 0.16 -0.50 0.11 1.00 12803
## cor(vowelA,vowelU) 0.37 0.14 0.06 0.63 1.00 13500
## Tail_ESS
## sd(Intercept) 19876
## sd(vowelA) 17289
## sd(vowelU) 22618
## cor(Intercept,vowelA) 15819
## cor(Intercept,vowelU) 19553
## cor(vowelA,vowelU) 20638
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 3.59 1.64 0.35 6.80 1.00 11404 16164
## vowelA -6.45 2.28 -11.01 -1.91 1.00 11409 16903
## vowelU -3.01 2.44 -7.85 1.83 1.00 13893 16851
## L1Japanese 3.68 1.65 0.48 6.94 1.00 10210 14967
## vowelA:L1Japanese -4.61 2.34 -9.19 0.03 1.00 10300 15646
## vowelU:L1Japanese -2.59 2.37 -7.27 2.05 1.00 12543 16216
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 2.49 0.05 2.39 2.59 1.00 45073 30272
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# summary
PC1.FPC1.L.m2.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.L (Number of observations: 1309)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.54 0.98 1.34 4.99 1.00 9955
## sd(L1Japanese) 2.45 0.88 1.29 4.66 1.00 8655
## cor(Intercept,L1Japanese) -0.72 0.28 -0.98 0.06 1.00 7932
## Tail_ESS
## sd(Intercept) 13776
## sd(L1Japanese) 17405
## cor(Intercept,L1Japanese) 12010
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.82 0.35 2.23 3.59 1.00 9570
## sd(vowelA) 4.01 0.50 3.16 5.11 1.00 6847
## sd(vowelU) 2.75 0.38 2.10 3.57 1.00 11585
## cor(Intercept,vowelA) -0.38 0.14 -0.63 -0.09 1.00 7201
## cor(Intercept,vowelU) -0.21 0.16 -0.50 0.11 1.00 11265
## cor(vowelA,vowelU) 0.38 0.15 0.07 0.64 1.00 11803
## Tail_ESS
## sd(Intercept) 16942
## sd(vowelA) 14884
## sd(vowelU) 20773
## cor(Intercept,vowelA) 13754
## cor(Intercept,vowelU) 18475
## cor(vowelA,vowelU) 18025
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.09 1.58 2.00 8.23 1.00 8379 11767
## vowelA -9.76 2.00 -13.30 -5.32 1.00 8109 8293
## vowelU -4.91 1.85 -8.38 -0.88 1.00 10129 9872
## L1Japanese 1.59 1.29 -0.97 4.11 1.00 9311 15721
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 2.49 0.05 2.39 2.59 1.00 45147 31610
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.L.m3.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ L1 + (1 | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.L (Number of observations: 1309)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 4.43 1.51 2.47 8.24 1.00 10295
## sd(L1Japanese) 3.28 1.18 1.79 6.24 1.00 10834
## cor(Intercept,L1Japanese) 0.21 0.31 -0.43 0.73 1.00 14075
## Tail_ESS
## sd(Intercept) 18479
## sd(L1Japanese) 18079
## cor(Intercept,L1Japanese) 20740
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.56 0.31 2.03 3.24 1.00 5916 12508
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 0.41 1.78 -3.09 4.00 1.00 6039 11118
## L1Japanese 1.33 1.51 -1.67 4.30 1.00 6844 12942
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.19 0.06 3.07 3.32 1.00 42152 28696
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.L.m4.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + (1 + vowel | speaker) + (1 | prompt)
## Data: dat.PC1.EN.L (Number of observations: 1309)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.36 0.68 0.63 3.11 1.00 9737 12529
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 3.26 0.38 2.61 4.10 1.00 6603
## sd(vowelA) 4.41 0.51 3.54 5.52 1.00 5127
## sd(vowelU) 2.90 0.38 2.23 3.74 1.00 9607
## cor(Intercept,vowelA) -0.52 0.12 -0.72 -0.27 1.00 5416
## cor(Intercept,vowelU) -0.36 0.14 -0.61 -0.06 1.00 10120
## cor(vowelA,vowelU) 0.49 0.13 0.21 0.70 1.00 9238
## Tail_ESS
## sd(Intercept) 12977
## sd(vowelA) 9231
## sd(vowelU) 16841
## cor(Intercept,vowelA) 10629
## cor(Intercept,vowelU) 17863
## cor(vowelA,vowelU) 17178
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 6.04 1.03 4.06 8.07 1.00 6860 11258
## vowelA -9.51 1.44 -12.33 -6.73 1.00 6959 11431
## vowelU -4.69 1.47 -7.60 -1.77 1.00 11067 13625
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 2.62 0.05 2.51 2.72 1.00 40182 30718
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# Model comparison using Bayes Factor: Interaction
comparison.PC1.FPC1.L.BAAP.m1.m2 <- bayestestR::bayesfactor_models(PC1.FPC1.L.m1.BAAP, PC1.FPC1.L.m2.BAAP, denominator = PC1.FPC1.L.m1.BAAP) # denominator: the model against which comparison is performed
comparison.PC1.FPC1.L.BAAP.m1.m2
## Bayes Factors for Model Comparison
##
## Model BF
## [2] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 238.58
##
## * Against Denominator: [1] vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## * Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.PC1.FPC1.L.BAAP.m1.m2, n_pies = "one", value = "BF")
# Model comparison using Bayes Factor: Fixed effects
comparison.fixed.effect.PC1.FPC1.L.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.L.m2.BAAP, PC1.FPC1.L.m3.BAAP, PC1.FPC1.L.m4.BAAP, denominator = PC1.FPC1.L.m2.BAAP)
comparison.fixed.effect.PC1.FPC1.L.BAAP
## Bayes Factors for Model Comparison
##
## Model BF
## [2] L1 + (1 | speaker) + (1 + L1 | prompt) 9.02e-92
## [3] vowel + (1 + vowel | speaker) + (1 | prompt) 2.57e-21
##
## * Against Denominator: [1] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## * Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.fixed.effect.PC1.FPC1.L.BAAP, n_pies = "one", value = "BF")
# save models for future analysis
save(PC1.FPC1.L.m1.BAAP, file = "model/PC1.FPC1.L.m1.BAAP.rda")
save(PC1.FPC1.L.m2.BAAP, file = "model/PC1.FPC1.L.m2.BAAP.rda")
save(PC1.FPC1.L.m3.BAAP, file = "model/PC1.FPC1.L.m3.BAAP.rda")
save(PC1.FPC1.L.m4.BAAP, file = "model/PC1.FPC1.L.m4.BAAP.rda")
load(file = "model/PC1.FPC1.L.m1.BAAP.rda")
# obtain posterior draws (sampled values)
post_data_L_PC1_FPC1_BAAP <- PC1.FPC1.L.m1.BAAP %>%
emmeans::emmeans(~ L1*vowel, epred = TRUE) %>%
tidybayes::gather_emmeans_draws()
# obtain highest density interval
post_data_L_PC1_FPC1_BAAP_hdi <- post_data_L_PC1_FPC1_BAAP %>%
tidybayes::median_hdi() %>%
mutate(
vowel = case_when(
vowel == "A" ~ "/a/",
vowel == "I" ~ "/i/",
vowel == "U" ~ "/u/"
),
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
))
# renaming vowel labels
post_data_L_PC1_FPC1_BAAP <- post_data_L_PC1_FPC1_BAAP %>%
mutate(
vowel = case_when(
vowel == "A" ~ "/a/",
vowel == "I" ~ "/i/",
vowel == "U" ~ "/u/"
),
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
))
# plotting posterior distribution
post_data_L_PC1_FPC1_BAAP_plot <- ggplot() +
geom_point(data = post_data_L_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, colour = vowel, shape = vowel), size = 6, stroke = 2, show.legend = FALSE) +
geom_errorbar(data = post_data_L_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, ymin = .lower, ymax = .upper), size = 2, width = 0.8, show.legend = FALSE) +
ggbeeswarm::geom_quasirandom(data = post_data_L_PC1_FPC1_BAAP, aes(x = vowel, y = .value, colour = vowel), alpha = 0.008, size = 0.01, width = 0.3, dodge.width = 0.3, show.legend = FALSE) +
geom_hline(yintercept = 0, size = 0.5, linetype = 2) +
facet_wrap(~ factor(L1, levels = c("L1 English", "L1 Japanese"))) +
scale_colour_manual(values = c("blue4", "brown4", "darkolivegreen")) +
scale_y_continuous(limits = c(-20, 20)) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text.x = element_text(size = 30),
# axis.title.x = element_text(size = 20),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 20),
plot.title = element_text(size = 30, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
labs(x = "Vowel", y = "FPC1") +
labs(title = "/l/: FPC1 for PC1")
post_data_L_PC1_FPC1_BAAP_plot
emmeans# quantifying contrasts
## get the adjusted means
PC1.FPC1.L.m1.BAAP.em <- emmeans::emmeans(PC1.FPC1.L.m1.BAAP, ~ vowel|L1)
PC1.FPC1.L.m1.BAAP.em
## L1 = English:
## vowel emmean lower.HPD upper.HPD
## I 3.592 0.279 6.727
## A -2.856 -6.347 0.667
## U 0.581 -3.454 4.532
##
## L1 = Japanese:
## vowel emmean lower.HPD upper.HPD
## I 7.260 4.705 9.799
## A -3.795 -6.499 -1.119
## U 1.674 -1.428 4.969
##
## Point estimate displayed: median
## HPD interval probability: 0.95
## get all possible contrasts
PC1.FPC1.L.m1.BAAP.cont <- emmeans::contrast(PC1.FPC1.L.m1.BAAP.em, "tukey")
PC1.FPC1.L.m1.BAAP.cont
## L1 = English:
## contrast estimate lower.HPD upper.HPD
## I - A 6.44 1.96 11.05
## I - U 3.00 -1.86 7.80
## A - U -3.44 -8.49 1.48
##
## L1 = Japanese:
## contrast estimate lower.HPD upper.HPD
## I - A 11.04 7.51 14.74
## I - U 5.59 1.58 9.45
## A - U -5.47 -9.65 -1.57
##
## Point estimate displayed: median
## HPD interval probability: 0.95
## get the posterior draws from the contrasts
PC1_FPC1_L_cont_BAAP_posterior <- tidybayes::gather_emmeans_draws(PC1.FPC1.L.m1.BAAP.cont) %>%
mutate(
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
)
)
# calculating probability of direction
## L1 English
EngIA_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"]) # EngIA_R: positive
EngIU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"]) # EngIU_R: positive
EngAU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"]) # EngAU_R: negative
## L1 Japanese
JapIA_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - A"]) # JapIA_R: positive
JapIU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "I - U"]) # JapIU_R: positive
JapAU_L_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_L_cont_BAAP_posterior$.value[PC1_FPC1_L_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_L_cont_BAAP_posterior$contrast == "A - U"]) # JapAU_R: negative
### contrast probability of direction merged together
L_PC1_FPC1_contrast_BAAP <- c(EngIA_L_PC1_FPC1_BAAP_pd, EngIU_L_PC1_FPC1_BAAP_pd, EngAU_L_PC1_FPC1_BAAP_pd, JapIA_L_PC1_FPC1_BAAP_pd, JapIU_L_PC1_FPC1_BAAP_pd, JapAU_L_PC1_FPC1_BAAP_pd)
### merge contrast pd with hdi/median
L_PC1_FPC1_contrast_BAAP <- data.frame(PC1.FPC1.L.m1.BAAP.cont, L_PC1_FPC1_contrast_BAAP) %>% rename(
PD = L_PC1_FPC1_contrast_BAAP
)
### results
L_PC1_FPC1_contrast_BAAP %>%
mutate(across(where(is.numeric), ~ round(., digits = 2))) %>%
mutate(
contrast = factor(contrast, levels = c("A - U", "I - U", "I - A"))
) %>%
arrange(L1, contrast)
## contrast L1 estimate lower.HPD upper.HPD PD
## 1 A - U English -3.44 -8.49 1.48 0.92
## 2 I - U English 3.00 -1.86 7.80 0.91
## 3 I - A English 6.44 1.96 11.05 0.99
## 4 A - U Japanese -5.47 -9.65 -1.57 0.99
## 5 I - U Japanese 5.59 1.58 9.45 0.99
## 6 I - A Japanese 11.04 7.51 14.74 1.00
## plot
PC1_FPC1_L_cont_posterior_BAAP_plot <- PC1_FPC1_L_cont_BAAP_posterior %>%
ggplot(aes(y = contrast, x = .value)) +
# tidybayes::stat_halfeye(point_interval = "median_hdi", aes(fill = after_stat(level)), .width = c(.66, .95, .99)) +
tidybayes::stat_slab(aes(fill = after_stat(level), slab_alpha = 0.4), point_interval = "median_hdi", .width = c(.89, .95, .99)) +
tidybayes::stat_pointinterval(point_interval = "median_hdi", .width = c(.89, .95, .99)) +
# scale_fill_brewer(na.translate = FALSE) +
scale_fill_manual(values = c("darkolivegreen", "blue4", "brown4"), na.translate = FALSE) +
scale_x_continuous(limits = c(-20, 20)) +
facet_wrap(~ L1) +
geom_vline(xintercept = 0, lty = 2) +
theme_classic() +
guides(fill = guide_legend(override.aes = list(alpha = 0.4), title = "HDI")) +
labs(x = "difference", title = "/l/: PC1/FPC1 contrast") +
theme(legend.text = element_text(size = 30),
# legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.text.y = element_text(size = 25),
# axis.title = element_text(size = 20),
axis.title = element_blank(),
plot.title = element_text(size = 30, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30))
PC1_FPC1_L_cont_posterior_BAAP_plot
L.BAAP <- ggarrange(post_data_L_PC1_FPC1_BAAP_plot, PC1_FPC1_L_cont_posterior_BAAP_plot, common.legend = FALSE, legend = "bottom", ncol = 1)
L.BAAP
## saving plot
ggsave(L.BAAP, filename = "figure/PC1_FPC1_L.png", width = 8, height = 10, dpi = 1000)
# subset English /Éą/ data
dat.PC1.EN.R <- dat.PC1 %>%
filter(segment == "/Éą/") %>%
rename(
fPC1 = PC1,
fPC2 = PC2,
fPC3 = PC3,
fPC4 = PC4,
fPC5 = PC5
)
# define the baseline level explicitly
dat.PC1.EN.R <- dat.PC1.EN.R %>%
mutate(
vowel = case_when(
vowel == "/a/" ~ "A",
vowel == "/i/" ~ "I",
vowel == "/u/" ~ "U"
)
)
dat.PC1.EN.R$vowel <- factor(dat.PC1.EN.R$vowel, levels = c("I", "A", "U"))
dat.PC1.EN.R$L1 <- factor(dat.PC1.EN.R$L1, levels = c("English", "Japanese"))
# convert other variables into factor
dat.PC1.EN.R$speaker <- as.factor(dat.PC1.EN.R$speaker)
dat.PC1.EN.R$speaker <- droplevels(dat.PC1.EN.R$speaker)
levels(dat.PC1.EN.R$speaker)
## [1] "2d57ke" "2drb3c" "2zy9tf" "3bcpyh" "3hsubn" "3pzrts" "3wy8us" "4ps8zx"
## [9] "54i2ks" "5jzj2h" "5upwe3" "6p63jy" "7cd4t4" "bfwizh" "birw55" "bj8mjm"
## [17] "byxcff" "c5y8z6" "cdsju7" "cu2jce" "dbtzn2" "ds6umh" "fgd95u" "fkcwjr"
## [25] "h5s4x3" "heat7g" "hgrist" "i3wa7f" "j586ts" "jcy8xi" "kjn9n4" "m46dhf"
## [33] "m5r28t" "s6a8gh" "srky8g" "tay55n" "uig6n9" "ut4e5m" "we8z58" "xub9bc"
## [41] "z3n578" "zajk25" "zz3r2g"
dat.PC1.EN.R$prompt <- as.factor(dat.PC1.EN.R$prompt)
dat.PC1.EN.R$prompt <- droplevels(dat.PC1.EN.R$prompt)
levels(dat.PC1.EN.R$prompt)
## [1] "ram" "ramp" "rap" "reap" "reef" "reeve" "room" "rube"
# summary
summary(PC1.FPC1.R.m1.BAAP)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.R (Number of observations: 1321)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.06 0.92 1.01 4.41 1.00 12759
## sd(L1Japanese) 1.43 0.71 0.58 3.21 1.00 14188
## cor(Intercept,L1Japanese) -0.51 0.32 -0.94 0.27 1.00 24991
## Tail_ESS
## sd(Intercept) 15486
## sd(L1Japanese) 18151
## cor(Intercept,L1Japanese) 25564
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 3.36 0.40 2.67 4.25 1.00 11666
## sd(vowelA) 2.31 0.35 1.69 3.08 1.00 15536
## sd(vowelU) 2.98 0.44 2.22 3.93 1.00 13599
## cor(Intercept,vowelA) -0.41 0.15 -0.66 -0.10 1.00 18748
## cor(Intercept,vowelU) -0.34 0.15 -0.61 -0.03 1.00 16962
## cor(vowelA,vowelU) 0.28 0.17 -0.09 0.59 1.00 10883
## Tail_ESS
## sd(Intercept) 19801
## sd(vowelA) 24906
## sd(vowelU) 23525
## cor(Intercept,vowelA) 26098
## cor(Intercept,vowelU) 23882
## cor(vowelA,vowelU) 21106
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 1.23 1.59 -1.94 4.36 1.00 11058 17109
## vowelA -3.23 1.99 -7.13 0.69 1.00 13949 17304
## vowelU -3.64 2.20 -8.00 0.78 1.00 14560 18292
## L1Japanese 2.07 1.46 -0.84 4.95 1.00 9206 16575
## vowelA:L1Japanese -8.86 1.55 -11.92 -5.78 1.00 14479 19235
## vowelU:L1Japanese -3.54 1.79 -7.09 0.04 1.00 13903 19244
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.23 0.07 3.10 3.36 1.00 54803 29616
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# summary
PC1.FPC1.R.m2.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.R (Number of observations: 1321)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.81 1.40 1.21 6.44 1.00 11832
## sd(L1Japanese) 4.70 1.58 2.60 8.64 1.00 13471
## cor(Intercept,L1Japanese) -0.35 0.46 -0.96 0.63 1.00 5298
## Tail_ESS
## sd(Intercept) 14587
## sd(L1Japanese) 21658
## cor(Intercept,L1Japanese) 10915
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 3.37 0.41 2.68 4.26 1.00 11063
## sd(vowelA) 2.33 0.36 1.70 3.10 1.00 13968
## sd(vowelU) 2.99 0.44 2.23 3.93 1.00 13094
## cor(Intercept,vowelA) -0.42 0.15 -0.67 -0.10 1.00 17244
## cor(Intercept,vowelU) -0.34 0.15 -0.61 -0.03 1.00 16272
## cor(vowelA,vowelU) 0.28 0.18 -0.09 0.60 1.00 10106
## Tail_ESS
## sd(Intercept) 18474
## sd(vowelA) 23274
## sd(vowelU) 22687
## cor(Intercept,vowelA) 23133
## cor(Intercept,vowelU) 24118
## cor(vowelA,vowelU) 18303
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.50 2.45 -2.01 7.78 1.00 6946 12702
## vowelA -5.80 4.03 -14.67 1.50 1.00 5240 10129
## vowelU -4.63 2.67 -9.83 0.64 1.00 8753 14591
## L1Japanese -2.20 2.03 -6.16 1.85 1.00 12534 18314
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.23 0.07 3.10 3.37 1.00 49223 31137
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.R.m3.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ L1 + (1 | speaker) + (1 + L1 | prompt)
## Data: dat.PC1.EN.R (Number of observations: 1321)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 2.82 0.99 1.55 5.29 1.00 11605
## sd(L1Japanese) 5.05 1.65 2.88 9.16 1.00 11156
## cor(Intercept,L1Japanese) 0.29 0.30 -0.35 0.78 1.00 12671
## Tail_ESS
## sd(Intercept) 18571
## sd(L1Japanese) 18420
## cor(Intercept,L1Japanese) 19353
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.85 0.34 2.27 3.61 1.00 6225 11811
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.86 1.32 -3.52 1.76 1.00 6943 11668
## L1Japanese -2.14 2.11 -6.29 2.08 1.00 7262 12197
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.56 0.07 3.42 3.70 1.00 34942 28804
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
PC1.FPC1.R.m4.BAAP
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: fPC1 ~ vowel + (1 + vowel | speaker) + (1 | prompt)
## Data: dat.PC1.EN.R (Number of observations: 1321)
## Draws: 4 chains, each with iter = 12000; warmup = 2000; thin = 1;
## total post-warmup draws = 40000
##
## Group-Level Effects:
## ~prompt (Number of levels: 8)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.51 0.78 0.69 3.48 1.00 10468 11954
##
## ~speaker (Number of levels: 43)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 3.46 0.42 2.75 4.38 1.00 7433
## sd(vowelA) 4.85 0.57 3.85 6.10 1.00 5871
## sd(vowelU) 3.39 0.45 2.60 4.37 1.00 9626
## cor(Intercept,vowelA) -0.40 0.13 -0.64 -0.12 1.00 5163
## cor(Intercept,vowelU) -0.42 0.14 -0.66 -0.12 1.00 9245
## cor(vowelA,vowelU) 0.52 0.13 0.24 0.73 1.00 9303
## Tail_ESS
## sd(Intercept) 13430
## sd(vowelA) 11617
## sd(vowelU) 16583
## cor(Intercept,vowelA) 10699
## cor(Intercept,vowelU) 16581
## cor(vowelA,vowelU) 16793
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 2.61 1.12 0.42 4.79 1.00 8414 13685
## vowelA -9.15 1.63 -12.28 -6.06 1.00 8214 12966
## vowelU -6.02 1.65 -9.25 -2.78 1.00 12000 14690
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.26 0.07 3.13 3.40 1.00 47049 28256
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# Model comparison using Bayes Factor: Interaction
comparison.PC1.FPC1.R.m1.m2.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.R.m1.BAAP, PC1.FPC1.R.m2.BAAP, denominator = PC1.FPC1.R.m1.BAAP) # denominator: the model against which comparison is performed
comparison.PC1.FPC1.R.m1.m2.BAAP
## Bayes Factors for Model Comparison
##
## Model BF
## [2] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt) 0.802
##
## * Against Denominator: [1] vowel + L1 + L1:vowel + (1 + vowel | speaker) + (1 + L1 | prompt)
## * Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.PC1.FPC1.R.m1.m2.BAAP, n_pies = "one", value = "BF")
# Model comparison using Bayes Factor: Fixed effects
comparison.fixed.effect.PC1.FPC1.R.BAAP <- bayestestR::bayesfactor_models(PC1.FPC1.R.m2.BAAP, PC1.FPC1.R.m3.BAAP, PC1.FPC1.R.m4.BAAP, denominator = PC1.FPC1.R.m2.BAAP)
comparison.fixed.effect.PC1.FPC1.R.BAAP
## Bayes Factors for Model Comparison
##
## Model BF
## [2] L1 + (1 | speaker) + (1 + L1 | prompt) 3.79e-22
## [3] vowel + (1 + vowel | speaker) + (1 | prompt) 1.40e-09
##
## * Against Denominator: [1] vowel + L1 + (1 + vowel | speaker) + (1 + L1 | prompt)
## * Bayes Factor Type: marginal likelihoods (bridgesampling)
# plot
plot(comparison.fixed.effect.PC1.FPC1.R.BAAP, n_pies = "one", value = "BF")
# save models for future analysis
save(PC1.FPC1.R.m1.BAAP, file = "model/PC1.FPC1.R.m1.BAAP.rda")
save(PC1.FPC1.R.m2.BAAP, file = "model/PC1.FPC1.R.m2.BAAP.rda")
save(PC1.FPC1.R.m3.BAAP, file = "model/PC1.FPC1.R.m3.BAAP.rda")
save(PC1.FPC1.R.m4.BAAP, file = "model/PC1.FPC1.R.m4.BAAP.rda")
# obtain posterior draws (sampled values)
load(file = "model/PC1.FPC1.R.m1.BAAP.rda")
post_data_R_PC1_FPC1_BAAP <- PC1.FPC1.R.m1.BAAP %>%
emmeans::emmeans(~ L1*vowel, epred = TRUE) %>%
tidybayes::gather_emmeans_draws()
# obtain highest density interval
post_data_R_PC1_FPC1_BAAP_hdi <- post_data_R_PC1_FPC1_BAAP %>%
tidybayes::median_hdi() %>%
mutate(
vowel = case_when(
vowel == "A" ~ "/a/",
vowel == "I" ~ "/i/",
vowel == "U" ~ "/u/"
),
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
))
# renaming vowel labels
post_data_R_PC1_FPC1_BAAP <- post_data_R_PC1_FPC1_BAAP %>%
mutate(
vowel = case_when(
vowel == "A" ~ "/a/",
vowel == "I" ~ "/i/",
vowel == "U" ~ "/u/"
),
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
))
# plotting posterior distribution
post_data_R_PC1_FPC1_BAAP_plot <- ggplot() +
geom_point(data = post_data_R_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, colour = vowel, shape = vowel), size = 6, stroke = 2, show.legend = FALSE) +
geom_errorbar(data = post_data_R_PC1_FPC1_BAAP_hdi, aes(x = vowel, y = .value, ymin = .lower, ymax = .upper), size = 2, width = 0.8, show.legend = FALSE) +
ggbeeswarm::geom_quasirandom(data = post_data_R_PC1_FPC1_BAAP, aes(x = vowel, y = .value, colour = vowel), alpha = 0.008, size = 0.01, width = 0.3, dodge.width = 0.3, show.legend = FALSE) +
geom_hline(yintercept = 0, size = 0.5, linetype = 2) +
facet_wrap(~ factor(L1, levels = c("L1 English", "L1 Japanese"))) +
scale_colour_manual(values = c("blue4", "brown4", "darkolivegreen")) +
scale_y_continuous(limits = c(-20, 20)) +
theme_classic() +
theme(legend.text = element_text(size = 30),
legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text.x = element_text(size = 30),
# axis.title.x = element_text(size = 20),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 20),
plot.title = element_text(size = 30, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30)
) +
labs(x = "Vowel", y = "FPC1") +
labs(title = "/Éą/: FPC1 for PC1")
post_data_R_PC1_FPC1_BAAP_plot
# calculate proportion of posterior distribution away from zero
EngI_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"] > 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"]) # EngI_R: positive
EngA_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"]) # EngA_R: negative
EngU_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 English" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"]) # EngU_R: negative
JapI_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"] > 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/i/"]) # JapI_R: positive
JapA_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/a/"]) # JapA_R: negative
JapU_R_PC1_FPC1_BAAP_pd <- length(which(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"] < 0)) / length(post_data_R_PC1_FPC1_BAAP$.value[post_data_R_PC1_FPC1_BAAP$L1 == "L1 Japanese" & post_data_R_PC1_FPC1_BAAP$vowel == "/u/"]) # JapU_R: negative
# probability data nerged together
R_PC1_FPC1_BAAP_pd <- c(EngI_R_PC1_FPC1_BAAP_pd, EngA_R_PC1_FPC1_BAAP_pd, EngU_R_PC1_FPC1_BAAP_pd, JapI_R_PC1_FPC1_BAAP_pd, JapA_R_PC1_FPC1_BAAP_pd, JapU_R_PC1_FPC1_BAAP_pd)
R_PC1_FPC1_pd_BAAP_hdi <- data.frame(post_data_R_PC1_FPC1_BAAP_hdi, R_PC1_FPC1_BAAP_pd)
R_PC1_FPC1_pd_BAAP_hdi
## L1 vowel .value .lower .upper .width .point .interval
## 1 L1 English /i/ 1.240521 -1.9527241 4.3356258 0.95 median hdi
## 2 L1 English /a/ -2.008996 -5.1066147 1.0676416 0.95 median hdi
## 3 L1 English /u/ -2.421176 -6.0895511 1.2920743 0.95 median hdi
## 4 L1 Japanese /i/ 3.301085 0.5989382 5.9901645 0.95 median hdi
## 5 L1 Japanese /a/ -8.797508 -11.5136751 -6.1293067 0.95 median hdi
## 6 L1 Japanese /u/ -3.888072 -7.0499718 -0.6145064 0.95 median hdi
## R_PC1_FPC1_BAAP_pd
## 1 0.801025
## 2 0.911975
## 3 0.914425
## 4 0.987000
## 5 0.999600
## 6 0.986250
emmeans# quantifying contrasts
## get the adjusted means
PC1.FPC1.R.m1.BAAP.em <- emmeans::emmeans(PC1.FPC1.R.m1.BAAP, ~ vowel|L1)
PC1.FPC1.R.m1.BAAP.em
## L1 = English:
## vowel emmean lower.HPD upper.HPD
## I 1.24 -1.966 4.320
## A -2.01 -5.107 1.068
## U -2.42 -6.090 1.292
##
## L1 = Japanese:
## vowel emmean lower.HPD upper.HPD
## I 3.30 0.556 5.940
## A -8.80 -11.514 -6.129
## U -3.89 -7.051 -0.619
##
## Point estimate displayed: median
## HPD interval probability: 0.95
## get all possible contrasts
PC1.FPC1.R.m1.BAAP.cont <- emmeans::contrast(PC1.FPC1.R.m1.BAAP.em, "tukey")
PC1.FPC1.R.m1.BAAP.cont
## L1 = English:
## contrast estimate lower.HPD upper.HPD
## I - A 3.245 -0.696 7.11
## I - U 3.646 -0.794 7.97
## A - U 0.411 -4.061 4.78
##
## L1 = Japanese:
## contrast estimate lower.HPD upper.HPD
## I - A 12.088 8.484 15.63
## I - U 7.184 3.147 11.16
## A - U -4.898 -8.793 -0.83
##
## Point estimate displayed: median
## HPD interval probability: 0.95
## get the posterior draws from the contrasts
PC1_FPC1_R_cont_BAAP_posterior <- tidybayes::gather_emmeans_draws(PC1.FPC1.R.m1.BAAP.cont) %>%
mutate(
L1 = case_when(
L1 == "English" ~ "L1 English",
L1 == "Japanese" ~ "L1 Japanese"
)
)
# calculating probability of direction
## L1 English
EngIA_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"]) # EngIA_R: positive
EngIU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"]) # EngIU_R: positive
EngAU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 English" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"]) # EngAU_R: positive
## L1 Japanese
JapIA_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - A"]) # JapIA_R: positive
JapIU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"] > 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "I - U"]) # JapIU_R: positive
JapAU_R_PC1_FPC1_BAAP_pd <- length(which(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"] < 0)) / length(PC1_FPC1_R_cont_BAAP_posterior$.value[PC1_FPC1_R_cont_BAAP_posterior$L1 == "L1 Japanese" & PC1_FPC1_R_cont_BAAP_posterior$contrast == "A - U"]) # JapAU_R: negative
### contrast probability of direction merged together
R_PC1_FPC1_contrast_BAAP <- c(EngIA_R_PC1_FPC1_BAAP_pd, EngIU_R_PC1_FPC1_BAAP_pd, EngAU_R_PC1_FPC1_BAAP_pd, JapIA_R_PC1_FPC1_BAAP_pd, JapIU_R_PC1_FPC1_BAAP_pd, JapAU_R_PC1_FPC1_BAAP_pd)
### merge contrast pd with hdi/median
R_PC1_FPC1_contrast_BAAP <- data.frame(PC1.FPC1.R.m1.BAAP.cont, R_PC1_FPC1_contrast_BAAP) %>% rename(
PD = R_PC1_FPC1_contrast_BAAP
)
### results
R_PC1_FPC1_contrast_BAAP %>%
mutate(across(where(is.numeric), ~ round(., digits = 2))) %>%
mutate(
contrast = factor(contrast, levels = c("A - U", "I - U", "I - A"))
) %>%
arrange(L1, contrast)
## contrast L1 estimate lower.HPD upper.HPD PD
## 1 A - U English 0.41 -4.06 4.78 0.59
## 2 I - U English 3.65 -0.79 7.97 0.96
## 3 I - A English 3.25 -0.70 7.11 0.96
## 4 A - U Japanese -4.90 -8.79 -0.83 0.99
## 5 I - U Japanese 7.18 3.15 11.16 1.00
## 6 I - A Japanese 12.09 8.48 15.63 1.00
## plot
PC1_FPC1_R_cont_posterior_BAAP_plot <- PC1_FPC1_R_cont_BAAP_posterior %>%
ggplot(aes(y = contrast, x = .value)) +
# tidybayes::stat_halfeye(point_interval = "median_hdi", aes(fill = after_stat(level)), .width = c(.66, .95, .99)) +
tidybayes::stat_slab(aes(fill = after_stat(level), slab_alpha = 0.4), point_interval = "median_hdi", .width = c(.89, .95, .99)) +
tidybayes::stat_pointinterval(point_interval = "median_hdi", .width = c(.89, .95, .99)) +
# scale_fill_brewer(na.translate = FALSE) +
scale_fill_manual(values = c("darkolivegreen", "blue4", "brown4"), na.translate = FALSE) +
scale_x_continuous(limits = c(-20, 20)) +
facet_wrap(~ L1) +
geom_vline(xintercept = 0, lty = 2) +
theme_classic() +
guides(fill = guide_legend(override.aes = list(alpha = 0.4), title = "HDI")) +
labs(x = "difference", title = "/Éą/: PC1/FPC1 difference") +
theme(legend.text = element_text(size = 30),
# legend.key.size = unit(2, 'cm'),
legend.position = "bottom",
legend.title = element_text(size = 30),
axis.text = element_text(size = 15),
axis.text.y = element_text(size = 25),
# axis.title = element_text(size = 20),
axis.title = element_blank(),
plot.title = element_text(size = 30, hjust = 0, face = "bold"),
strip.text.x = element_text(size = 30),
strip.text.y = element_text(angle = 0, size = 30))
PC1_FPC1_R_cont_posterior_BAAP_plot
R.BAAP <- ggarrange(post_data_R_PC1_FPC1_BAAP_plot, PC1_FPC1_R_cont_posterior_BAAP_plot, common.legend = FALSE, legend = "bottom", ncol = 1)
R.BAAP
## saving plot
ggsave(R.BAAP, filename = "figure/PC1_FPC1_R.png", width = 8, height = 10, dpi = 1000)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/London
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] emmeans_1.8.9 emuR_2.4.2 ggsci_3.0.0 ggpubr_0.6.0
## [5] gridExtra_2.3 scales_1.3.0 brms_2.20.4 Rcpp_1.0.11
## [9] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
## [17] tidyverse_2.0.0 ggplot2_3.5.0 fdapace_0.5.9
##
## loaded via a namespace (and not attached):
## [1] tensorA_0.36.2.1 rstudioapi_0.15.0 jsonlite_1.8.8
## [4] magrittr_2.0.3 ggbeeswarm_0.7.2 estimability_1.4.1
## [7] farver_2.1.1 rmarkdown_2.25 ragg_1.2.6
## [10] vctrs_0.6.5 base64enc_0.1-3 rstatix_0.7.2
## [13] htmltools_0.5.7 distributional_0.4.0 curl_5.1.0
## [16] tidybayes_3.0.6 broom_1.0.5 Formula_1.2-5
## [19] sass_0.4.8 StanHeaders_2.26.28 pracma_2.4.4
## [22] bslib_0.6.1 htmlwidgets_1.6.4 plyr_1.8.9
## [25] zoo_1.8-12 cachem_1.0.8 uuid_1.1-1
## [28] igraph_1.5.1 mime_0.12 lifecycle_1.0.4
## [31] pkgconfig_2.0.3 colourpicker_1.3.0 Matrix_1.6-1.1
## [34] R6_2.5.1 fastmap_1.1.1 shiny_1.8.0
## [37] digest_0.6.34 numDeriv_2016.8-1.1 colorspace_2.1-0
## [40] ps_1.7.5 textshaping_0.3.7 crosstalk_1.2.0
## [43] Hmisc_5.1-1 labeling_0.4.3 fansi_1.0.6
## [46] timechange_0.2.0 mgcv_1.9-0 abind_1.4-5
## [49] compiler_4.3.2 withr_3.0.0 htmlTable_2.4.2
## [52] backports_1.4.1 inline_0.3.19 shinystan_2.6.0
## [55] carData_3.0-5 DBI_1.1.3 highr_0.10
## [58] QuickJSR_1.0.8 pkgbuild_1.4.2 ggsignif_0.6.4
## [61] MASS_7.3-60 gtools_3.9.4 loo_2.7.0
## [64] tools_4.3.2 vipor_0.4.5 foreign_0.8-85
## [67] beeswarm_0.4.0 httpuv_1.6.13 threejs_0.3.3
## [70] nnet_7.3-19 glue_1.7.0 callr_3.7.3
## [73] nlme_3.1-163 promises_1.2.1 checkmate_2.3.1
## [76] cluster_2.1.4 reshape2_1.4.4 see_0.8.1
## [79] generics_0.1.3 gtable_0.3.4 tzdb_0.4.0
## [82] data.table_1.14.8 hms_1.1.3 car_3.1-2
## [85] utf8_1.2.4 ggdist_3.3.0 pillar_1.9.0
## [88] markdown_1.11 posterior_1.5.0 later_1.3.2
## [91] splines_4.3.2 lattice_0.21-9 tidyselect_1.2.0
## [94] miniUI_0.1.1.1 knitr_1.45 arrayhelpers_1.1-0
## [97] V8_4.4.1 stats4_4.3.2 xfun_0.41
## [100] bridgesampling_1.1-2 matrixStats_1.2.0 DT_0.30
## [103] rstan_2.32.3 stringi_1.8.2 yaml_2.3.7
## [106] evaluate_0.23 codetools_0.2-19 cli_3.6.2
## [109] RcppParallel_5.1.7 rpart_4.1.21 systemfonts_1.0.5
## [112] shinythemes_1.2.0 xtable_1.8-4 munsell_0.5.0
## [115] processx_3.8.2 jquerylib_0.1.4 coda_0.19-4
## [118] svUnit_1.0.6 wrassp_1.0.4 parallel_4.3.2
## [121] rstantools_2.3.1.1 ellipsis_0.3.2 prettyunits_1.2.0
## [124] bayestestR_0.13.1 dygraphs_1.1.1.6 bayesplot_1.10.0
## [127] Brobdingnag_1.2-9 mvtnorm_1.2-4 xts_0.13.1
## [130] insight_0.19.6 crayon_1.5.2 rlang_1.1.3
## [133] cowplot_1.1.1 shinyjs_2.1.0